{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "68c0040c-7c2f-44a3-934e-4969b14d1ebf",
   "metadata": {},
   "source": [
    "## The Prompt Completion Format\n",
    "```python\n",
    "    prompt_completion = {\n",
    "        \"prompt\": [\n",
    "            \"prompt1\",\n",
    "            \"prompt2\",\n",
    "        ],\n",
    "        \"completion\": [\n",
    "            \"completion1\",\n",
    "            \"completion2\",\n",
    "        ]\n",
    "    }\n",
    "```\n",
    "### Example: converting a dataset from Hugging Face to the Prompt Completion format and uploading to Hugging Face repo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "7c19feb1-28e6-4ecb-9545-af1f61fef7b9",
   "metadata": {},
   "outputs": [],
   "source": [
    "from datasets import load_dataset\n",
    "from datasets import Dataset \n",
    "\n",
    "data = load_dataset(\"tatsu-lab/alpaca\")[\"train\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "960dce00-cc00-479e-9b1b-6d8e2fd089d6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'instruction': 'Give three tips for staying healthy.', 'input': '', 'output': '1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \\n2. Exercise regularly to keep your body active and strong. \\n3. Get enough sleep and maintain a consistent sleep schedule.', 'text': 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\\n\\n### Instruction:\\nGive three tips for staying healthy.\\n\\n### Response:\\n1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \\n2. Exercise regularly to keep your body active and strong. \\n3. Get enough sleep and maintain a consistent sleep schedule.'}\n"
     ]
    }
   ],
   "source": [
    "print(data[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "893c8c1e-ce97-4d13-a718-ecc24fe5a97f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "###Instruction: Give three tips for staying healthy.\n",
      "###Input: \n",
      "###Assistant:\n",
      "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n",
      "2. Exercise regularly to keep your body active and strong. \n",
      "3. Get enough sleep and maintain a consistent sleep schedule.\n"
     ]
    }
   ],
   "source": [
    "prompt_completion = {\n",
    "    \"prompt\": [],\n",
    "    \"completion\": []\n",
    "}\n",
    "\n",
    "for d in data:\n",
    "    if \"input\" in d.keys():\n",
    "        prompt = f\"\"\"###Instruction: {d['instruction']}\n",
    "###Input: {d['input']}\n",
    "###Assistant:\"\"\"\n",
    "    else:\n",
    "        prompt = f\"\"\"###Instruction: {d['instruction']}\n",
    "###Assistant:\"\"\"\n",
    "    prompt_completion[\"prompt\"].append(prompt)\n",
    "    prompt_completion[\"completion\"].append(d[\"output\"])\n",
    "\n",
    "print(prompt_completion[\"prompt\"][0])\n",
    "print(prompt_completion[\"completion\"][0])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dd5c509d-5fd5-43b5-b119-6029904ec266",
   "metadata": {},
   "source": [
    "### Publish to Hugging Face repo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6db95a3f-9bb7-403f-882b-b7e428be7a2c",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = Dataset.from_dict(prompt_completion)\n",
    "dataset.push_to_hub(\"<HUGGING_FACE_DATASET_REPO>\", token=\"<HUGGING_FACE_TOKEN>\") # Example of a dataset repo 'test/test_dataset'"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
